#!/usr/bin/perl -w
#
# Written by Martin Bartosch for the OpenXPKI project 2006
# Copyright (c) 2006-2011 by The OpenXPKI Project
#

use warnings;
use strict;
use English;

use Data::Dumper;

use Pod::Usage;
use Getopt::Long;
use POSIX qw(strftime);
use File::Spec;
use Config::Std;
use Cwd qw(getcwd);

# use Smart::Comments;

my %params;
my %config;


my $svn = 'svn';
my $git = 'git';
my $git_svn = 'git-svn';

my $revision_info;
my $mapping_of;
my $format_of;
my $git_reference_tag_of;
my $saved_directory;

###########################################################################

sub slurp {
    my $filename = shift;

    return unless (defined $filename && -r $filename);

    my $result = do {
       open my $HANDLE, '<', $filename or return;
        if (not $HANDLE) {
            die "Could not open file '$filename' for reading. Stopped";
        }
        # slurp mode
        local $INPUT_RECORD_SEPARATOR;     # long version of $/
        <$HANDLE>;
    };
    return $result;
}


sub get_revision_info {
    my $result = {};

    my $fh;
    # first get SVN revision information
    if (open $fh, "$svn info -R 2>/dev/null |") {
        SVN_INFO:
        while (<$fh>) {
            chomp;
            if (m{ \A Revision: \s* (\d+) }xms) {
                $result->{revision} = $1;
                next SVN_INFO;
            }
            if (m{ \A Last\ Changed\ Rev: \s* (\d+) }xms) {
                if (
                    ! exists $result->{'last-changed-revision'} ||
                    ($1 > $result->{'last-changed-revision'})
                ) {
                    $result->{'last-changed-revision'} = $1;
                }
            }
        }
        close $fh;
    }

    # check if we are running in a git checkout
    # getting revision info from SVN did not work, try git
    if ( (`which $git 2>/dev/null`) && (`$git branch 2>/dev/null`) ) {
        # try via git-svn...
        if (! exists $result->{revision}) {
            # try to get revision from git-svn
            my $last_log = `$git_svn log --oneline --limit 1 2>/dev/null`;
            my ($revision) = ($last_log =~ m{\A r(\d+)\ \|\ .*}xms);
            if (defined $revision) {
                $result->{revision} = $revision;
            }
        }

        # ... and via plain git
        if (! exists $result->{revision}) {
            # revision could not be obtained by git-svn
            # reason may be that this repository does not rebase via git-svn,
            # but rather pulls from another repository. assume that the
            # svn revision information is available in git full text logs.

            open $fh, qq{ $git log --pretty=email | };
            while (my $line = <$fh>) {
                chomp $line;
                # first git-svn-id line encountered wins
                if ($line =~ m{ \A git-svn-id: .*? @(\d+) \s+ }xms) {
                    $result->{revision} = $1;
                    last;
                }
            }
            close $fh;
        }
        # fake last-changed-revision
        $result->{'last-changed-revision'} ||= $result->{revision};

        # obtain current head commit (may even be a detached branch)
        my $head_commit;
        open $fh, qq{ $git rev-list --max-count=1 HEAD | };
        while (my $line = <$fh>) {
            chomp $line;
            $head_commit ||= $line;
        }
        close $fh;

        # obtain current head commit (may even be a detached branch)
        my $abbreviated_head_commit;
        open $fh, qq{ $git rev-list --abbrev-commit --max-count=1 HEAD | };
        while (my $line = <$fh>) {
            chomp $line;
            $abbreviated_head_commit ||= $line;
        }
        close $fh;

        $result->{'git-abbreviated-commit-hash'} = $abbreviated_head_commit;
        $result->{'git-commit-hash'} = $head_commit;

        # obtain current branch ref (branch name)
        my $branch_ref = `$git symbolic-ref HEAD 2>/dev/null`;
        if ($CHILD_ERROR != 0) {
            $branch_ref = '(unnamed branch)';
        }
        chomp $branch_ref;
        $branch_ref =~ s{ refs/heads/ }{}xms;
        $result->{'git-branch'} = $branch_ref;

        # obtain date of current head commit
        my $head_commit_date = `$git show -s --format=%cD HEAD 2>/dev/null`;
        if ($CHILD_ERROR != 0) {
            $head_commit_date = '(unable to get commit date)';
        }
        chomp $head_commit_date;
        $result->{'git-commit-date'} = $head_commit_date;

        # get git tag information
        my @git_tags;

        open $fh, qq{ $git show-ref --tags -d | };
        while (my $line = <$fh>) {
            chomp $line;
            my ($commit, $tag) = ($line =~ m{ \A (\w+) \s+ refs/tags/(.*) }xms);
            if (defined $tag && ($commit eq $head_commit)) {
                $result->{'git-tag'} ||= $tag;
                push @git_tags, $tag;
            }
        }
        close $fh;
        $result->{'git-tags'} = join(', ', @git_tags);

        # get most recent tag information
        foreach my $tag (undef, keys %{ $git_reference_tag_of }) {
            my $tag_mapping;
            if (defined $tag) {
                $tag_mapping = $git_reference_tag_of->{$tag} || $tag;
            }

            my $reftag = '';
            if (defined $tag) {
                $reftag = "--match $tag";
            }

            open $fh, qq{ $git describe --tags $reftag 2>/dev/null | };

            my $description;
            while (my $line = <$fh>) {
                chomp $line;
                $description = $line;
                last;
            }
            my $postfix = '';
            if (defined $tag_mapping) {
                $postfix = '-' . $tag_mapping;
            }
            $result->{'git-description' . $postfix} = $description;
        }
    }

    return $result;
}


sub upwards_find_file {
    my $filename = shift;

    return undef unless defined $filename;

    my $dir = getcwd;

    while ($dir ne '/') {
        if (exists $params{verbose}) {
            print STDERR "* scanning $dir for $filename\n";
        }

        my $absolute_filename = File::Spec->catfile($dir, $filename);
        if (-e $absolute_filename) {
            if (exists $params{verbose}) {
                print STDERR "* found $absolute_filename\n";
            }
            return $absolute_filename;
        }

        # one level up
        my @dirs = File::Spec->splitdir($dir);
        pop @dirs;
        $dir = File::Spec->catdir(@dirs);
    }
    return;
}

sub expand_keyword {
    my $arg = shift;

    my $last_arg = "";
    while (defined $arg && ($last_arg ne $arg)) {
        $last_arg = $arg;
        # NB: reverse sorting keys results in "longer" keywords listed
        # first, avoiding a "greedy" match of shorter but identical patterns
        # (see 'git-tags' vs. 'git-tag')
        foreach my $keyword (reverse sort keys %{$mapping_of->{keyword}}) {
            if ($arg =~ m{ $keyword }xms) {
                my $value = $mapping_of->{keyword}->{$keyword}->{entry}->{value};
                if (! defined $value) {
                    die "Could not get value for keyword '$keyword'. Stopped";
                }
                $arg =~ s{ $keyword }{$value}xmsg;
            }
        }
        $arg = strftime($arg, gmtime(time));
    }

    return $arg;
}

###########################################################################

# make sure that system output is not localized
$ENV{LANG} = 'C';

GetOptions(\%params,
   'help|?',
   'man',
   'verbose',
   'format=s',
   'directory=s',
   'dump',
   'dumpformat=s',
   'list-formats',
   'list-keywords',
   'show-revisioninfo',
) or pod2usage(-verbose => 0);

pod2usage(-exitstatus => 0, -verbose => 2) if $params{man};
pod2usage(-verbose => 1) if ($params{help});

if (defined $params{directory}) {
    $saved_directory = getcwd;
    if (! chdir $params{directory}) {
        die "Could not change to directory $params{directory}. Stopped";
    }
}

my $def_file = upwards_find_file('.VERSION_DEFINITION');

if (! defined $def_file) {
    die "Could not find version definition file anywhere. Stopped";
}
read_config($def_file, %config);

if (exists $config{FORMAT_DEFINITIONS}) {
    $format_of = $config{FORMAT_DEFINITIONS};
    delete $config{FORMAT_DEFINITIONS};
}

if (exists $config{GIT_REFERENCE_TAGS}) {
    $git_reference_tag_of = $config{GIT_REFERENCE_TAGS};
    delete $config{GIT_REFERENCE_TAGS};
}

# try to get revision from git
$revision_info = get_revision_info();

if (! scalar keys %{$revision_info}) {
    # could not get revision from git, check if cached result is available
    print STDERR "NOTE: could not determine revision info from version control\n";
    # determine base directory of VERSION_DEFINITION file
    my ($volume, $dir, $file) = File::Spec->splitpath( $def_file );
    my $revision_file = File::Spec->catfile($dir, '.vergen_revision_state');

    if (-r $revision_file) {
        print STDERR "NOTE: reading cached revision information from $revision_file\n";
        my $data = slurp($revision_file);
        eval $data;
    }
}

if (! scalar keys %{$revision_info}) {
    print STDERR "NOTE: neither git nor .vergen_revision_state was found.\n";
    print STDERR "\n";
    print STDERR "vergen works best when called within a checked out git repository.\n";
    print STDERR "Where this is not possible (e. g. when working on an extracted\n";
    print STDERR "tarball without the .git directory) it is possible to include\n";
    print STDERR "the complete revision information before creation of the tarball\n";
    print STDERR "and archive this file inside the archive.\n\n";
    print STDERR "To do so, do the following in the same directory that contains the\n";
    print STDERR ".VERSION_DEFINITION file:\n\n";
    print STDERR "vergen --show-revisioninfo >.vergen_revision_state\n\n";
    print STDERR "If this file is present (and git is not found) subsequent calls to\n";
    print STDERR "vergen will use the cached information instead.\n\n";
    exit 1;
}



### %config
### $format_of

foreach my $component (keys %config) {
    ### $component...

    # ignore components starting with an underscore (custom config)
    if ($component =~ m{ \A _ }xms) {
        next;
    }

    # coerce keyword into arrayref
    if (defined $config{$component}->{keyword}) {
        if (ref $config{$component}->{keyword} eq '') {
            $config{$component}->{keyword} = [ $config{$component}->{keyword} ];
        }
    } else {
        die "No keyword defined for component $component. Stopped";
    }

    # built-in components
    if ($component =~ m{ \A (?: revision |
                                last-changed-revision |
                                git-branch |
                                git-commit-hash |
                                git-commit-date |
                                git-abbreviated-commit-hash |
                                git-tag |
                                git-tags |
                                git-description.*
                            ) \z }xms) {

        $mapping_of->{component}->{$component} = {
            keyword => $config{$component}->{keyword},
            value   => $revision_info->{$component},
            type    => 'built-in',
        };
        next;
    }

    my $optional = (defined $config{$component}{optional}
                    && ($config{$component}{optional} =~ m{ \A (?:yes|true|1) \z }ixms));
    ### $optional

    if (
        defined $config{$component}{filename}
        && defined $config{$component}{expression}
    ) {
        die "Mutually exclusive sources defined for component '$component'. Stopped";
    }

    my $value;
    if (defined $config{$component}{filename}) {
        my $file = upwards_find_file($config{$component}{filename});

        if (defined $file) {
            $value = slurp($file);
            if (! defined $value) {
                $value = '';
            }
            chomp $value;
        } else {
            if (! $optional) {
                die "Could not find version file '$config{$component}{filename}' for component '$component' upwards from here. Stopped";
            }
            # default: empty
            $value = '';
        }
    }
    if (defined $config{$component}{expression}) {
        # allow perl expressions in configuration
        $value = eval $config{$component}{expression};
        if ($EVAL_ERROR) {
            die "Evaluation error in expression of component '$component':\n" .$EVAL_ERROR;
        }
        # and even anonymous subroutines
        if (ref $value eq 'CODE') {
            $value = &{$value};
        }
    }

    if (! defined $value && ! $optional) {
        die "Could not determine value for mandatory component '$component'. Stopped";
    }

    $mapping_of->{component}->{$component} = {
        keyword => $config{$component}->{keyword},
        value   => $value,
        type    => 'component',
    }
}

# remap entries
foreach my $component (keys %{$mapping_of->{component}}) {
    ### $component
    my $entry = $mapping_of->{component}->{$component};
    foreach my $keyword (@{ $mapping_of->{component}->{$component}->{keyword} }) {
        $mapping_of->{keyword}->{$keyword}->{component} = $component,
        $mapping_of->{keyword}->{$keyword}->{entry} = $entry,
    }
}

### $mapping_of

if (defined $params{format}) {
    my $format = $params{format};

    if (exists $format_of->{$format}) {
        $format = $format_of->{$format};
    }
    my $value = expand_keyword($format);

    eval "printf('%s', qq($value));";
}


if (defined $params{'list-formats'}) {
    print join("\n", sort keys %{$format_of});
    print "\n";
}

if (defined $params{'list-keywords'}) {
    print join("\n", sort keys %{$mapping_of->{keyword}});
    print "\n";
}

if (defined $params{'dump'}) {
    foreach my $component (keys %{$mapping_of->{component}}) {
        my $dumpformat = 'KEYWORD=VALUE\n';
        if (defined $params{dumpformat}) {
            $dumpformat = $params{dumpformat};
        }

        my $keyword = $mapping_of->{component}->{$component}->{keyword}->[0];
        my $value   = expand_keyword($mapping_of->{keyword}->{$keyword}->{entry}->{value});
        my %vars = (
            KEYWORD => $keyword,
            VALUE   => $value,
        );

        my @values = ();

        while (1) {
            my ($item) = ($dumpformat =~ m{ (KEYWORD|VALUE) }xms);
            last if (! defined $item);
            $dumpformat =~ s{ $item }{%s}xms;
            push @values, $vars{$item};
        }
        ### $dumpformat
        ### @values

        if (! defined $values[1]) {
            $values[1] = '';
        }
        ### @values
        eval "printf(qq($dumpformat), \@values);";
    }
}

if (defined $params{'show-revisioninfo'}) {
    print Data::Dumper->Dump([ $revision_info ], ['revision_info']);
}

exit 0;

__END__

=head1 NAME

vergen - Version management tool

=head1 USAGE

vergen [options] COMMAND

 Options:
   --help                brief help message
   --man                 full documentation
   --format <arg>        print version using format 'arg'
   --directory <dir>     pretend to be in directory 'dir'
   --dump                dump keyword assignments
   --dumpformat <string> use <string> as dump format
                         (default: KEYWORD=VALUE\n)
   --list-formats        lists all user-defined formats available
   --list-keywords       lists all keywords available in formats
   --show-revisioninfo   show all revision information available
   --verbose             show details about information obtained

=head1 DESCRIPTION

B<vergen> is a tool that can help to maintain consistent version numbers
in a project. If desired it uses Subversion and/or Git to determine
revision control information.

Starting from the current directory, vergen searches upwards for its
configuration file '.VERSION_DEFINITION' and reads the first file found
(see B<The .VERSION_DEFINITION File> for a detailed description.)
After the configuration file has been read, vergen again traverses the
directory tree upwards from the current directory and searches for all
file names defined in the version definition file. Once a file is found,
its content is read and stored as the value for the corresponding component.

Components can arbitrarily be combined to 'formats'. A format consists
of a string that may contain any number of custom and built-in component
names.
It is then possible to print the resulting version string using
the --format option.

When used within a directory hierarchy under revision control via
Subversion and/or Git, B<vergen> is able to obtain and use information about
the current checkout.

vergen works best when called within a checked out git repository.
Where this is not possible (e. g. when working on an extracted
tarball without the .git directory) it is possible to include
the complete revision information before creation of the tarball
and archive this file inside the archive.

To do so, do the following in the same directory that contains the
.VERSION_DEFINITION file:

vergen --show-revisioninfo >.vergen_revision_state

If this file is present (and git is not found) subsequent calls to
vergen will use the cached information instead.



=head1 OPTIONS

=over 8

=item B<--format> arg

Print format string, using the version numbers obtained by vergen. The
output format is determined by 'arg' and may either be a symbolic name
referencing a format string in the FORMAT_DEFINITIONS section of
the corresponding .VERSION_DEFINITION file or a custom string specifying
the version number. You may use all keywords defined in .VERSION_DEFINITION
and all conversion specifications as defined by strftime(3).

=item B<--directory> dir

Makes it possible to simulate calling vergen in another directory.
This is identical to calling

  ( cd <dir> && vergen ... )

=item B<--dump>

Dump all keyword definitions in a format that can be sources by sh.

=item B<--dumpformat> string

Use specified string as template for keyword dump. Defaults to
'KEYWORD=VALUE\n'.
Useful dump formats:

  --dumpformat 'KEYWORD="VALUE"\n'
  --dumpformat 'export KEYWORD="VALUE"\n'

=item B<--list-formats>

List all user-defined formats.

=item B<--list-keywords>

List all keywords (can be part of version format of may be referenced directly).

=item B<--show-revisioninfo>

Dump all revision information that could be obtained from revision control
tools.

=item B<--verbose>

Show which files are processed for determination of the version number
components.

=back

=head1 The .VERSION_DEFINITION File

The version definition file contains format and version number component
definitions. It is split in a number of sections that define the composition
of version numbers.

=head2 Format definition

The format definition section is started by the [FORMAT_DEFINITIONS] tag.

Example:
  [FORMAT_DEFINITIONS]
  simple: MAJOR.MINOR
  advanced: MAJOR.MINOR.RELEASE

This configuration defines two named formats called 'simple' and 'advanced'
that can be used with the --format option. The value following the colon
is used as the output format in this case.

=head2 Git reference tags

The git reference tags section is started by the [GIT_REFERENCE_TAGS] tag.

Example:
  [GIT_REFERENCE_TAGS]
  initial: my-root-commit

This section contains a list of reference tag names which are queried
using git --describe --tags --match TAG.

Each tag name can optionally be mapped to a symbolic name (my-root-commit in
the above example). If this mapping is left out it defaults to the tag name.

The git repository is queried via git --describe and the result is made
available to subsequent processing under the name 'git-description-NAME'
with NAME being replaced with the tag-mapping.

=head2 Custom component definitions

Section names are interpreted as a custom version component
definition.
In order to create a custom version number component you should define
the following keys:

=head3 filename

File name to use. vergen searches upwards from the current directory
until it finds the specified file.
The file contents will be assigned as the component value.

=head3 keyword

Symbolic name(s) describing the version component that can be used
to reference its value. More than one keyword can be used for
aliasing the component. Each of these keyword can be used to reference
the component value. It is possible to use the keyword in a format (--format)
or even in the contents of the version component files. The program resolves
recursive references to keywords properly.

Only the first keyword defined is used for --dump.

=head3 optional

If set to 'yes' or 'true', this component is not mandatory.

=head2 Revision control built-ins

When called within a directory tree that is under version control by
Subversion and/or Git it is possible to obtain certain information from the
revision control system.

B<vergen> support Subversion-only operation, Subversion with Git (using
git-svn to rebase the Subversion repository) and Git-only. Git-only mode
also tries to guess Subversion version numbers from the Git log (e. g.
when pulling from a Git repository that uses git-svn to rebase from
Subversion).

=head3 Subversion built-ins

The following named sections are hardcoded
and can be used to access Subversion information.

=head4 revision

'revision' expands to the global SVN revision number

=head4 last-changed-revision

'last-changed-revision' determines the highest "Last Changed Rev"
below the current directory (recursively).


=head3 Git built-ins

The following named sections are hardcoded and can be used to access Git
version information (if current directory is inside a Git repository).

=head4 git-commit-hash

'git-commit-hash' expands to the Git commit hash for the current HEAD.

=head4 git-commit-date

'git-commit-date' expands to the Git commit date for the current HEAD.

=head4 git-abbreviated-commit-hash

'git-abbreviated-commit-hash' expands to the abbreviated commit hash.

=head4 git-tag

'git-tag' expands to the first Git tag name found for the current HEAD,
undefined if the current HEAD is untagged.

=head4 git-tags

'git-tags' expands to the all Git tags found for the current HEAD (comma
separated), undefined if the current HEAD is untagged.

=head4 git-branch

'git-branch' expands to the current branch name (without leading 'refs/heads/')
or the string '(unnamed branch)' in case of a detached head.

=head1 Examples

=head2 .VERSION_DEFINITION

  # Format definitions
  [FORMAT_DEFINITIONS]
  version: MAJOR.MINOR.RELEASESUFFIX\n
  daily_snapshot: %F-MAJOR.MINOR.RELEASESUFFIX\n
  git-commit: MAJOR.MINOR.GIT_TAG

  # Built-in components
  [revision]
  keyword:          SVN_REVISION

  [last-changed-revision]
  keyword:          SVN_LAST_CHANGED_REVISION

  [git-commit-hash]
  keyword:          GIT_COMMIT_HASH

  [git-abbreviated-commit-hash]
  keyword:          GIT_ABBREVIATED_COMMIT_HASH

  [git-tag]
  keyword:          GIT_TAG
  optional:          yes

  [git-tags]
  keyword:          GIT_TAGS
  optional:          yes

  [git-branch]
  keyword:          GIT_BRANCH
  optional:          no

  # Custom components
  [major]
  filename:         .VERSION_MAJOR
  keyword:          MAJOR

  [minor]
  filename:         .VERSION_MINOR
  keyword:          MINOR

  [release]
  filename:         .VERSION_RELEASE
  keyword:          RELEASE

  [suffix]
  filename:         .VERSION_SUFFIX
  keyword:          SUFFIX
  optional:         yes


=head2 Invocation

Using the above configuration file:

  vergen --format daily_snapshot
  vergen --format MAJOR.MINOR
  vergen --dump
